#group by state then summarize costs and drop any na field values
task1.1 <- bmps %>% group_by(StateAbbreviation) %>%
summarize(stateCost = sum(Cost, na.rm = TRUE))
task1.1
## # A tibble: 7 x 2
## StateAbbreviation stateCost
## <chr> <dbl>
## 1 DC 15623736.
## 2 DE 41329130.
## 3 MD 627164538.
## 4 NY 15273818.
## 5 PA 562922554.
## 6 VA 723866417.
## 7 WV 83023958.
#log transform skewed data
bmps$logCost <- log(bmps$Cost)
bmps$logTotalAmountCredited <- log(bmps$TotalAmountCredited)
#filter dataset to rows where units are acres and plot log transformed data
task1.2 <- bmps %>% filter(., Unit == "Acres") %>%
ggplot(., aes(x=logCost, y=logTotalAmountCredited)) +
geom_point()
task1.2
## Warning: Removed 170 rows containing missing values (geom_point).
#see unique BMPs for cover crops to get idea of spelling
uni.bmps <- c(unique(bmps$BMPShortName))
uni.bmps
## [1] "wetpondwetland" "dryponds"
## [3] "filter" "rr"
## [5] "bioretudab" "bioswale"
## [7] "vegopchannoudab" "permpavnosvnoudab"
## [9] "bioretudcd" "urbstrmrest"
## [11] "infiltration" "impsurred"
## [13] "st" "urbantreeplant"
## [15] "scp1" "scp2"
## [17] "scp3" "scp4"
## [19] "scp5" "scp6"
## [21] "csoconnect" "eands1"
## [23] "conplan" "forestbuffers"
## [25] "treeplant" "landretireopen"
## [27] "grassbuffers" "oswnofence"
## [29] "precrotgrazing" "watercontstruc"
## [31] "extdryponds" "infiltwithsv"
## [33] "forharvestbmp" "covercroptradwed"
## [35] "covercroptradweo" "barnrunoffcont"
## [37] "covercroptradwnd" "covercroptradwno"
## [39] "covercroptradwlo" "covercroptradbed"
## [41] "covercroptradbeo" "covercroptradbea"
## [43] "covercroptradbno" "covercropcomearly"
## [45] "covercropcomnormal" "covercropcomlate"
## [47] "conservetill" "nmcoren"
## [49] "nmcorep" "grassbuffexcl"
## [51] "covercroptradtno" "hrtill"
## [53] "septicdecon" "septicconnect"
## [55] "septicpump" "wetlandrestorefloodplain"
## [57] "grassbuffexcleffn" "grassbufferseffn"
## [59] "forestbufferseffn" "forestbufferseffps"
## [61] "grassbufferseffps" "grassbuffexcleffps"
## [63] "wetlandrestoreeff" "forestbufurban"
## [65] "nonurbstrmrest" "covercroptradreo"
## [67] "covercroptradrea" "covercroptradrnd"
## [69] "covercroptradrno" "covercroptradrld"
## [71] "covercroptradrlo" "covercroptradwea"
## [73] "covercroptradwld" "covercroptradbnd"
## [75] "covercroptradfeo" "covercroptradfea"
## [77] "covercroptradleo" "covercroptradlea"
## [79] "covercroptradted" "covercroptradteo"
## [81] "covercroptradtea" "covercroptradtld"
## [83] "covercroptradtlo" "covercroptradarea"
## [85] "scp11" "covercroptradokeo"
## [87] "covercroptradbreo" "covercroptradbrea"
## [89] "forestbufurbaneff" "advancedgi"
## [91] "grassbuffexclnar" "landretirepas"
## [93] "forestbuffexcl" "horsepasman"
## [95] "imperviousdisconnection" "covercroptradred"
## [97] "urbanforplant" "loaflot"
## [99] "injection" "incorplowlate"
## [101] "urbannmmdca" "urbannmmddiy"
## [103] "covercroptradtnd" "eands2"
## [105] "forestbuffexcleffn" "forestbuffexcleffps"
## [107] "covercroptradohed" "covercroptradohea"
## [109] "covercroptradbred" "wetlandcreatefloodplain"
## [111] "wetlandcreateeff" "forestbuffexclnar"
## [113] "shoreurb" "dirtgraveldsa"
## [115] "shoreag" "covercroptradared"
## [117] "covercroptradohnd" "carseqaltcrop"
## [119] "abanminerec" "covercroptradnutrnd"
## [121] "lowrestill" "nmraten"
## [123] "nmratep" "nmplacen"
## [125] "nmplacep" "nmtimen"
## [127] "nmtimep" "forestbuffnarrow"
## [129] "grassbuffnarrow" "covercroptradnutrno"
## [131] "wetlandenhance" "wetlandenhanceeff"
## [133] "shoreagnoveg" "shoreurbnoveg"
## [135] "covercroptradlnd" "urbannmplan"
## [137] "permpavnosvudcd" "septicsecenhance"
## [139] "septicseccon" "vegopchannoudcd"
## [141] "permpavsvnoudab" "permpavsvudcd"
## [143] "covercroptradnutweo" "septicdeenhance"
## [145] "bioretnoudab" "septiceffenhance"
## [147] "shoreurbveg" "permpavsvudab"
## [149] "permpavnosvudab" "urbfilterrr"
## [151] "urbfilterst" "covercroptradnutreo"
## [153] "covercroptradnutwno" "covercroptradfed"
## [155] "covercroptradlgled" "covercroptradlgleo"
## [157] "covercroptradled" "covercroptradfped"
## [159] "covercroptradfpnd" "covercroptradlglnd"
#filter BMPShortName to rows with cover in them, checked using uni.bmps
task1.3 <- bmps %>% filter(., stringr::str_detect(BMPShortName, "cover")
& TotalAmountCredited > 50 #remove skew from all 0's
& TotalAmountCredited < 1000) %>% #makes boxplot more readable
ggplot(., aes(x=StateAbbreviation, y=TotalAmountCredited)) + #plot
geom_boxplot()
task1.3
#use filter to remove rows where where year is 0
task1.4 <- dams %>% as_tibble() %>% #convert to tibble for swifter data handling
filter(., YEAR != 0) %>% # filter not equal to 0
ggplot(., aes(x=YEAR, y=STATE)) + #plot
geom_point(aes(color = OWNER_CODE)) #color by owner code
task1.4
#sum length of streams per HUC4 code
strm.len <- streams %>% as_tibble() %>% #convert to tibble
filter(., HUC4 != "0206") %>% #no 0206 dams and stream lenghts are minimal
group_by(HUC4) %>% #goup by HUC4 code
summarise(., totLen_KM = sum(LengthKM)) %>% #sum stream lengths
ggplot(., aes(x=HUC4, y=totLen_KM)) +
geom_col(aes(fill = HUC4), show.legend = FALSE) + #don't show legend as legend from dam plot will suffice
labs(y= "Total Length of Streams (km)",
caption = "HUC4 code 0206 removed due to lack of dams in basin")
#count of dams per HUC4 code
dam.count <- dams %>% as_tibble() %>% #convert to tibble
filter(., !is.na(HUC4)) %>% #drop HUC4 is NA
group_by(HUC4) %>% #group by HUC4 code
tally() %>% #tally counts of HUC4
ggplot(., aes(x=HUC4, y=n)) +
geom_col(aes(fill = HUC4)) +
labs(y= "Dam Quantity",
caption = "No dams recorded in HUC4 0206")
#plot both plots in grid
task1.5 <- gridExtra::grid.arrange(strm.len, dam.count, nrow=1, top = "Correlation between stream length released and number of removed dams")
task1.5
## TableGrob (2 x 2) "arrange": 3 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (1-1,1-2) arrange text[GRID.text.320]
###assume lines have no lengths to make spatial
#check crs of streams to see unit length
st_crs(streams) #length units = meters
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
#calculate length of each line in km
streams$len_KM <- as.vector(sf::st_length(streams) / 1000) #1000 m per km
#filter out NA, then sum stream lengths
task2.1 <- streams %>% filter(., !is.na(GNIS_Name)) %>% #remove NA since we don't know what stream they are from
group_by(GNIS_Name) %>% #Group by stream name
summarise(totStreamLen_KM = sum(len_KM)) %>% #sum length per stream
slice_max(n=5, order_by = totStreamLen_KM) #slice 5 longest streams
task2.1
## Simple feature collection with 5 features and 2 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -79.20828 ymin: 37.96212 xmax: -77.96634 ymax: 40.43248
## Geodetic CRS: WGS 84
## # A tibble: 5 x 3
## GNIS_Name totStreamLen_KM geometry
## <chr> <dbl> <GEOMETRY [°]>
## 1 Hazel River 45.4 LINESTRING (-78.24825 38.60204, -78.24823~
## 2 South River 44.8 LINESTRING (-79.20828 37.96331, -79.20808~
## 3 Great Trough Creek 43.5 MULTILINESTRING ((-78.13179 40.16831, -78~
## 4 Hughes River 22.5 MULTILINESTRING ((-78.24041 38.53904, -78~
## 5 Little Trough Creek 22.1 LINESTRING (-77.96634 40.43248, -77.9664 ~
# intersect streams and counties, group by county, sum stream length and slice
task2.2 <- sf::st_intersection(streams, counties) %>%
as_tibble() %>% #convert to tibble for faster processing
group_by(GEOID10) %>% # group by county fips code
summarise(totStrmLen = sum(LengthKM)) %>% #calculate total stream length per county
slice_max(n=3, order_by = totStrmLen) #slice highest three total stream length
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
task2.2
## # A tibble: 3 x 2
## GEOID10 totStrmLen
## <chr> <dbl>
## 1 51015 425.
## 2 42061 309.
## 3 42023 242.
#create GEOID column in bmps from subset GeographyName
bmps$GEOID = stringr::str_sub(bmps$GeographyName, 1, 5)
#sum cost of bmps per geoid
tot.cost <- bmps %>% filter(., Cost > 0) %>% #filter out values of 0
group_by(GEOID) %>% #GEOID to get a single value to join to counties later
summarise(., TotalBMPCosts = sum(Cost))
#join tot.cost to counties on GEOID, drop counties with no BMP costs
task2.3 <- left_join(counties, tot.cost, by = c("GEOID10"="GEOID")) %>%
filter(., TotalBMPCosts > 0)
tm_shape(task2.3) + tm_polygons(col = "TotalBMPCosts")